#load packages
library(RADstackshelpR)
library(gridExtra)

Step 1: Demultiplex each sample using 'process_radtags'.

Step 2: Samples with a demultiplexed file size of < 2 megabytes simply don't have enough information to be relevant downstream, and can be removed right away. (Note: this will help ensure that there are enough loci retained at an 80% completeness cutoff to make informed optimization decisions)

Step 3: Iterate over values of m ranging from 3-7, while leaving all other parameters at default values.

Step 4: Visualize the output of these 5 runs and determine the optimal value for m.

#optimize_m function will generate summary stats on your 5 iterative runs
#input can be full path to each file or just the file name if your working directory contains the files
m.out<-optimize_m(m3="/Users/devder/Desktop/hipposideros/m_3.vcf",
           m4="/Users/devder/Desktop/hipposideros/m_4.vcf",
           m5="/Users/devder/Desktop/hipposideros/m_5.vcf",
           m6="/Users/devder/Desktop/hipposideros/m_6.vcf",
           m7="/Users/devder/Desktop/hipposideros/m_7.vcf")

#visualize the effect of varying m on the depth of each sample
vis_depth(output = m.out)
## [1] "Visualize how different values of m affect average depth in each sample"
## Picking joint bandwidth of 9.53

#visualize the effect of varying m on the number of SNPs retained
vis_snps(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 7190

#visualize the effect of varying m on the number of loci retained
vis_loci(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 3420

#3 is the optimal m value, and will be used next to optimize M

Step 5: Iterate over values of M ranging from 1-8, setting m to the optimal value (here 3).

Step 6: Visualize the output of these 8 runs and determine the optimal value for M.

#optimize M
M.out<-optimize_bigM(M1="/Users/devder/Desktop/hipposideros/M1.vcf",
           M2="/Users/devder/Desktop/hipposideros/M2.vcf",
           M3="/Users/devder/Desktop/hipposideros/M3.vcf",
           M4="/Users/devder/Desktop/hipposideros/M4.vcf",
           M5="/Users/devder/Desktop/hipposideros/M5.vcf",
           M6="/Users/devder/Desktop/hipposideros/M6.vcf",
           M7="/Users/devder/Desktop/hipposideros/M7.vcf",
           M8="/Users/devder/Desktop/hipposideros/M8.vcf")

#visualize the effect of varying M on the number of SNPs retained
vis_snps(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 6090

#visualize the effect of varying M on the number of polymorphic loci retained
vis_loci(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 2920

Step 7: Iterate over values of n ranging from M-1, M, M+1, setting m and M to the optimal values (here 3 and 2, respectively).

Step 8: Visualize the output of these 3 runs and determine the optimal value for n.

#optimize n
n.out<-optimize_n(nequalsMminus1="/Users/devder/Desktop/hipposideros/n1.vcf",
           nequalsM="/Users/devder/Desktop/hipposideros/n2.vcf",
           nequalsMplus1="/Users/devder/Desktop/hipposideros/n3.vcf")

#visualize the effect of varying n on the number of SNPs retained
vis_snps(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
## Picking joint bandwidth of 7420

#visualize the effect of varying n on the number of polymorphic loci retained
vis_loci(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
## Picking joint bandwidth of 3230

Finally, make a single figure showing the optimization process of all three parameters

gl<-list()
gl[[1]]<-vis_depth(output = m.out)
## [1] "Visualize how different values of m affect average depth in each sample"
gl[[2]]<-vis_snps(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
gl[[3]]<-vis_loci(output = m.out, stacks_param = "m")
## Visualize how different values of m affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
gl[[4]]<-vis_snps(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
gl[[5]]<-vis_loci(output = M.out, stacks_param = "M")
## Visualize how different values of M affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
gl[[6]]<-vis_snps(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of SNPs retained.
## Density plot shows the distribution of the number of SNPs retained in each sample,
## while the asterisk denotes number of SNPs retained at an 80% completeness cutoff.
gl[[7]]<-vis_loci(output = n.out, stacks_param = "n")
## Visualize how different values of n affect number of polymorphic loci retained.
## Density plot shows the distribution of the number of loci retained in each sample,
## while the asterisk denotes number of loci retained at an 80% completeness cutoff. The optimal value is denoted by red color.
grid.arrange(
  grobs = gl,
  widths = c(1,1,1,1,1,1),
  layout_matrix = rbind(c(1,1,2,2,3,3),
                        c(4,4,4,5,5,5),
                        c(6,6,6,7,7,7))
)
## Picking joint bandwidth of 9.53
## Picking joint bandwidth of 7190
## Picking joint bandwidth of 3420
## Picking joint bandwidth of 6090
## Picking joint bandwidth of 2920
## Picking joint bandwidth of 7420
## Picking joint bandwidth of 3230